
# ------------------------------------------------------------------------------------------------
### Function to create cutoff sample for a given dataset
# ------------------------------------------------------------------------------------------------
create_cutoff_sample <- function(election_data, wave = NULL) {
  # Load the municipality boundaries (includes population data)
  PL_gmina <- gisco_get_lau(country = "PL", year="2020") %>%
    mutate(id = paste0("", substr(LAU_ID, 5, 6), "", substr(LAU_ID, 10, nchar(LAU_ID))))
  
  # Prepare election data for joining
  if(wave == "w1w2") {
    election_data_join <- election_data %>% 
      select(id, any_level_any_type, wave_id) %>% 
      unique()
  } else {
    election_data_join <- election_data %>% 
      select(id, any_level_any_type) %>% 
      unique()
  }
  
  PL_gmina <- PL_gmina %>%
    left_join(., election_data_join, by = "id") 
  
  # Unite polygons within treated and control municipalities
  united_treated <- PL_gmina %>% 
    filter(any_level_any_type == 1, if(wave == "w1w2") wave_id == 1 else TRUE) %>%
    st_union()
  
  united_control <- PL_gmina %>% 
    filter(if(wave == "w1w2") wave_id %in% c(0,2) else any_level_any_type == 0) %>%
    st_union()
  
  # Convert united polygons into MultiLineStrings
  boundary_treated <- st_boundary(united_treated)
  boundary_control <- st_boundary(united_control)
  
  # Create the policy adoption boundary
  cutoff <- st_intersection(boundary_treated, boundary_control)
  
  # Calculate municipal centroids and distances to the boundary
  PL_gmina <- PL_gmina %>%
    mutate(centroid = st_centroid(`_ogr_geometry_`),
           dist = st_distance(centroid, cutoff),
           dist_km = as.numeric(dist / 1000))
  
  # Find municipalities within 50 km from the policy boundary
  cutoff_gminas <- PL_gmina %>% 
    filter(dist_km < 50)
  
  # Filter the election data to cutoff municipalities
  election_data %>% filter(id %in% cutoff_gminas$id) 
}

